
#.git log --pretty=format:%cI,%h,%an,%ae,%s > C:\Users\ctobr\OneDrive\DATA_Science\1_farina_in_natura\COMMIT_LOG\commits.csv
# Código UBUNTU GIT LOG
# 1) git log --pretty=format:%cI,%h,%an,%ae,%s > /home/claudio/Farina001_GIT_LOG/commits.csv
# 2) abrir excel e e na aba dados, imrtar o arquivo gerado pelo código acima
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# import seaborn as sns; sns.set()
import datetime
import nbformat
import plotly.express as px
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode (connected = True )
from IPython.display import display
import plotly.graph_objs as go
import plotly.offline as py
from matplotlib.backends.backend_pdf import PdfPages
from fpdf import FPDF
from IPython.display import display
pd.options.display.max_columns = None
pd.options.display.max_rows = None
import locale
# para resolver o problema: "(unicode error) 'utf-8' codec can't decode "
# https://www.youtube.com/watch?v=ZSC7X4N8RKo,
#Erro:
# UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe9 in position 900: invalid continuation byte
# enem = pd.read_csv("MICRODADOS_ENEM_2020.csv", encoding="utf-8", sep = ';')
# enem = pd.read_csv("MICRODADOS_ENEM_2020.csv", encoding="ISO-8859-1", sep = ';')
transactions = pd.read_csv(r'/home/claudio/farina_in_natura001/CSV/farina.CSV',
encoding="ISO-8859-1", sep = ';')
transactions.head()
| venda | data_hora | cliente | mesa | item | produto | quantidade | valor_unit | desconto | total_item | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 460.0 | 01/02/2022 08:13 | CONSUMIDOR | M 1 | 119.0 | SUCO DE LARANJA 400ML | 1.0 | 14,5 | 0.0 | 14,5 |
| 1 | 460.0 | 01/02/2022 08:13 | CONSUMIDOR | M 1 | 121.0 | PAO SOURDOUGH OVOS | 1.0 | 15,5 | 0.0 | 15,5 |
| 2 | 461.0 | 01/02/2022 08:14 | CONSUMIDOR | M 2 | 119.0 | SUCO DE LARANJA 400ML | 1.0 | 14,5 | 0.0 | 14,5 |
| 3 | 461.0 | 01/02/2022 08:14 | CONSUMIDOR | M 2 | 121.0 | PAO SOURDOUGH OVOS | 1.0 | 15,5 | 0.0 | 15,5 |
| 4 | 465.0 | 01/02/2022 09:01 | CONSUMIDOR | M 1 | 46.0 | QUICHE BACON 90G | 2.0 | 19,5 | 0.0 | 39 |
transactions.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 25998 entries, 0 to 25997 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 venda 25992 non-null float64 1 data_hora 25992 non-null object 2 cliente 25992 non-null object 3 mesa 22779 non-null object 4 item 25922 non-null float64 5 produto 25992 non-null object 6 quantidade 25992 non-null float64 7 valor_unit 25992 non-null object 8 desconto 25992 non-null float64 9 total_item 25992 non-null object dtypes: float64(4), object(6) memory usage: 2.0+ MB
### CHECK OUT IF THE NUMBER OF TOTAL ENTRIES IS EQUAL TO THE NUMBER OF ENTRIES OF THE VARIABLES
transactions.isnull().sum()
venda 6 data_hora 6 cliente 6 mesa 3219 item 76 produto 6 quantidade 6 valor_unit 6 desconto 6 total_item 6 dtype: int64
## DROPPING THE ROWS THAT ALL COLUMN VALUES ARE NULL
# https://www.youtube.com/watch?v=uDr67HBIPz8&t=818s
transactions = transactions.dropna(how='all')
# Delete the column "item"
del transactions["item"]
## Using isna() to select all rows with NaN in the column ('mesa') and the value "CONSUMIDOR" on the column "cliente"
## and replace the "mesa" value with "M0"
## https://datatofish.com/rows-with-nan-pandas-dataframe/
## https://www.geeksforgeeks.org/how-to-replace-values-in-column-based-on-condition-in-pandas/
transactions.loc[(transactions["cliente"] == 'CONSUMIDOR') & (transactions['mesa'].isna()), 'mesa']= 'M0'
# Checking if the changes were correct.
transactions.loc[transactions["mesa"] == 'M0']
| venda | data_hora | cliente | mesa | produto | quantidade | valor_unit | desconto | total_item | |
|---|---|---|---|---|---|---|---|---|---|
| 121 | 520.0 | 02/02/2022 16:57 | CONSUMIDOR | M0 | BOMBOLONI NUTELLA 80G | 2.0 | 12 | 0.0 | 24 |
| 122 | 520.0 | 02/02/2022 16:57 | CONSUMIDOR | M0 | TARTELETE DE MORANGO 120G | 1.0 | 18 | 0.0 | 18 |
| 123 | 520.0 | 02/02/2022 16:57 | CONSUMIDOR | M0 | CROISSANT MUSSATRELA DE | 1.0 | 25,5 | 0.0 | 25,5 |
| 894 | 856.0 | 11/02/2022 19:19 | CONSUMIDOR | M0 | CROISSANT BRIE PARMA MEL DE | 2.0 | 25,5 | 0.0 | 51 |
| 895 | 856.0 | 11/02/2022 19:19 | CONSUMIDOR | M0 | VH SANTA HORTENSIA SAUVIG | 1.0 | 59 | 0.0 | 59 |
# Assigning a value "ND" to the empty records in column "mesa"
# https://www.w3schools.com/python/pandas/pandas_cleaning_empty_cells.asp
transactions['mesa'].fillna("ND",inplace = True )
# Convert the "venda" variable from float64 type to integer type
# https://stackoverflow.com/questions/43956335/convert-float64-column-to-int64-in-pandas
transactions['venda'] = np.int64(transactions['venda'])
# Convert the "quantidade" variable from float64 type to integer type
# https://stackoverflow.com/questions/43956335/convert-float64-column-to-int64-in-pandas
transactions['quantidade'] = np.int64(transactions['quantidade'])
# change the decimal separator of the the variable "valor_unit" from "," to "."
# https://cursos.alura.com.br/forum/topico-transformar-pontos-por-virgulas-dentro-de-um-dataframe-121090
transactions['valor_unit'] = transactions['valor_unit'].str.replace(",",".")
# Convert the "valor_uni" variable from object type to float64 type
# https://www.statology.org/pandas-convert-object-to-float/
transactions['valor_unit'] = transactions['valor_unit'].astype(float)
# change the decimal delimiter of the the variable "total_item" from "," to "."
# https://cursos.alura.com.br/forum/topico-transformar-pontos-por-virgulas-dentro-de-um-dataframe-121090
transactions['total_item'] = transactions['total_item'].str.replace(",",".")
# Convert the "total_item" variable from object type to float64 type
# https://www.statology.org/pandas-convert-object-to-float/
transactions['total_item'] = transactions['total_item'].astype(float)
# Convert "data_hora" to datetime object
transactions['data_hora'] = pd.to_datetime(transactions['data_hora'], format='%d/%m/%Y %H:%M')
# Creating a horario column that is a copy of data_hora and coverting it to datetime and extracting only the day;
transactions['horario'] = (transactions['data_hora'])
transactions['horario'] = transactions['horario'].dt.date
#How to Solve Python ValueError: unconverted data remains
#read://https_researchdatapod.com/?url=https%3A%2F%2Fresearchdatapod.com%2Fhow-to-solve-python-valueerror-unconverted-data-remains%2F
transactions.head()
| venda | data_hora | cliente | mesa | produto | quantidade | valor_unit | desconto | total_item | horario | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 460 | 2022-02-01 08:13:00 | CONSUMIDOR | M 1 | SUCO DE LARANJA 400ML | 1 | 14.5 | 0.0 | 14.5 | 2022-02-01 |
| 1 | 460 | 2022-02-01 08:13:00 | CONSUMIDOR | M 1 | PAO SOURDOUGH OVOS | 1 | 15.5 | 0.0 | 15.5 | 2022-02-01 |
| 2 | 461 | 2022-02-01 08:14:00 | CONSUMIDOR | M 2 | SUCO DE LARANJA 400ML | 1 | 14.5 | 0.0 | 14.5 | 2022-02-01 |
| 3 | 461 | 2022-02-01 08:14:00 | CONSUMIDOR | M 2 | PAO SOURDOUGH OVOS | 1 | 15.5 | 0.0 | 15.5 | 2022-02-01 |
| 4 | 465 | 2022-02-01 09:01:00 | CONSUMIDOR | M 1 | QUICHE BACON 90G | 2 | 19.5 | 0.0 | 39.0 | 2022-02-01 |
transactions.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 25992 entries, 0 to 25997 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 venda 25992 non-null int64 1 data_hora 25992 non-null datetime64[ns] 2 cliente 25992 non-null object 3 mesa 25992 non-null object 4 produto 25992 non-null object 5 quantidade 25992 non-null int64 6 valor_unit 25992 non-null float64 7 desconto 25992 non-null float64 8 total_item 25992 non-null float64 9 horario 25992 non-null object dtypes: datetime64[ns](1), float64(3), int64(2), object(4) memory usage: 2.2+ MB
# https://www.dataquest.io/blog/python-datetime-tutorial/
# add hora and minuto column; using class attributes .hour and .minute
transactions['hora'] = transactions.data_hora.dt.hour
transactions['minuto'] = transactions.data_hora.dt.minute
# https://gist.github.com/krisbolton/aef6179ecf906892565d326b979b3a5b
# https://www.youtube.com/watch?v=TbXaP56Omuc
transactions['hora_minuto']=pd.to_datetime(transactions['data_hora'])
transactions['hora_minuto'] = transactions['hora_minuto'].dt.time
transactions['data'] = transactions['data_hora'].dt.normalize()
## https://datagy.io/pandas-extract-date-from-datetime/
transactions['data'] = transactions['data_hora'].dt.normalize()
# Definindo a função time_of_day
def time_of_day(hora):
"""
Determina se a compra foi feita pela manhã, tarde ou noite.
"""
if hora < 12:
return 'manhã'
elif hora < 17:
return 'tarde'
else:
return 'noite'
# Apply time of day function to 'hora' column
transactions['time_of_day'] = transactions.hora.apply(time_of_day)
# https://pt.stackoverflow.com/questions/508484/mostrar-dias-da-semana#:~:text=Como%20weekday()%20retorna%20um,fim%20de%20semana%20ou%20n%C3%A3o.
# setar locale para português para que os dias da semana retornem em portugês no código abaixo.
locale.setlocale(locale.LC_ALL, 'pt_BR.utf-8')
'pt_BR.utf-8'
# Add a day of week variable and a second classifier of weekday or weekend
from datetime import date
import calendar
transactions['day_of_week'] = [calendar.day_name[i.weekday()] for i in transactions['horario']]
transactions['day_type'] = ['FDS' if (i == 'domingo') | (i == 'sábado') else 'DDS' for i in
transactions['day_of_week']]
transactions.rename(columns = {'day_of_week':'dia_da_semana'}, inplace = True)
transactions.rename(columns = {'time_of_day':'hora_do_dia'}, inplace = True)
transactions.rename(columns = {'day_type':'tipo_de_dia'}, inplace = True)
# Add an item count variable equal to one for future groupby operations
transactions['item_count'] = 1
transactions.head()
| venda | data_hora | cliente | mesa | produto | quantidade | valor_unit | desconto | total_item | horario | hora | minuto | hora_minuto | data | hora_do_dia | dia_da_semana | tipo_de_dia | item_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 460 | 2022-02-01 08:13:00 | CONSUMIDOR | M 1 | SUCO DE LARANJA 400ML | 1 | 14.5 | 0.0 | 14.5 | 2022-02-01 | 8 | 13 | 08:13:00 | 2022-02-01 | manhã | terça | DDS | 1 |
| 1 | 460 | 2022-02-01 08:13:00 | CONSUMIDOR | M 1 | PAO SOURDOUGH OVOS | 1 | 15.5 | 0.0 | 15.5 | 2022-02-01 | 8 | 13 | 08:13:00 | 2022-02-01 | manhã | terça | DDS | 1 |
| 2 | 461 | 2022-02-01 08:14:00 | CONSUMIDOR | M 2 | SUCO DE LARANJA 400ML | 1 | 14.5 | 0.0 | 14.5 | 2022-02-01 | 8 | 14 | 08:14:00 | 2022-02-01 | manhã | terça | DDS | 1 |
| 3 | 461 | 2022-02-01 08:14:00 | CONSUMIDOR | M 2 | PAO SOURDOUGH OVOS | 1 | 15.5 | 0.0 | 15.5 | 2022-02-01 | 8 | 14 | 08:14:00 | 2022-02-01 | manhã | terça | DDS | 1 |
| 4 | 465 | 2022-02-01 09:01:00 | CONSUMIDOR | M 1 | QUICHE BACON 90G | 2 | 19.5 | 0.0 | 39.0 | 2022-02-01 | 9 | 1 | 09:01:00 | 2022-02-01 | manhã | terça | DDS | 1 |
# Verificando se o valor "total_item" é igual a: "quantidade" * "valor_unit"
transactions["total_item_verif"] = transactions['quantidade']* transactions['valor_unit']
# Continuando o item anterior
# https://www.dataquest.io/blog/tutorial-add-column-pandas-dataframe-based-on-if-else-condition/
transactions['confirm'] = np.where(transactions['total_item'] > transactions['total_item_verif'], True, False)
transactions['diference']=(transactions['total_item'])-(transactions['total_item_verif'])
# continuando o item anterior
resumo = transactions.loc[transactions["confirm"] == True, ["total_item", "total_item_verif", 'diference']].sum()
print (resumo)
total_item 53878.3 total_item_verif 22219.1 diference 31659.2 dtype: float64
# How do I select specific rows and columns from a DataFrame?
# https://pandas.pydata.org/docs/getting_started/intro_tutorials/03_subset_data.html#how-do-i-select-specific-rows-and-columns-from-a-dataframe
# https://www.geeksforgeeks.org/select-rows-columns-by-name-or-index-in-pandas-dataframe-using-loc-iloc/
ts2 = transactions.loc[transactions["confirm"] == True, ["venda", "data","produto", "quantidade", "valor_unit", "total_item", "total_item_verif"]]
ts2.head()
| venda | data | produto | quantidade | valor_unit | total_item | total_item_verif | |
|---|---|---|---|---|---|---|---|
| 7039 | 3571 | 2022-04-29 | CROISSANT | 1 | 10.0 | 25.5 | 10.0 |
| 7047 | 3575 | 2022-04-29 | CROISSANT | 1 | 10.0 | 28.9 | 10.0 |
| 7054 | 3582 | 2022-04-29 | CROISSANT | 1 | 10.0 | 25.5 | 10.0 |
| 7072 | 3599 | 2022-04-30 | CROISSANT | 1 | 10.0 | 19.5 | 10.0 |
| 7074 | 3600 | 2022-04-30 | CROISSANT IFOOD | 1 | 12.0 | 51.0 | 12.0 |
/home/claudio/results/Dados_divergentes_vendas.html
/home/claudio/farina_in_natura001/CSV
fig_divergentes = go.Figure(data=[go.Table(
header=dict(values=list(ts2.columns),
font=dict(color='white', size=12), # configurando a cor e tamanho da letra
fill_color='seagreen',
align='left'),
cells=dict(values=[ts2['venda'], ts2['data'], ts2['produto'], ts2['quantidade'], ts2['valor_unit'], ts2['total_item'], ts2['total_item_verif']],
fill_color='lightcyan',
align='left'))
])
fig_divergentes.update_layout(
title={
'text': "Dados divergentes - Valores de venda - (Fev 2022 - Out 22)",
'y':0.95,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'})
py.iplot(fig_divergentes, filename='/home/claudio/results/grafico01 - Dados_divergentes_vendas.html')
# drop the 'confirm" and "total_item_verif" column
transactions = transactions.drop(['confirm'], axis=1)
transactions = transactions.drop(['total_item_verif'], axis=1)
transactions = transactions.drop (['diference'], axis=1)
Exploração dos Dados¶
# Find Unique Values of each item sold in the Column "produto"
# https://www.statology.org/pandas-unique-values-in-column/
# Pandas: get first 10 elements of a series
top_produtos = transactions.produto.value_counts()
print (top_produtos[:10])
CROISSANT 2393 PASTEL DE NATA 60G 1339 CROISSANT AMENDOAS 100G 742 FOLHADO MACA 80G 719 SANDUICHE DE PEPERONI BRIE 596 COOKIES 80G 559 CROISSANT IFOOD 534 SOURDOUGH AZEITONA E ALECRIM 517 FOLHADO FRANGO 160G 500 BOMBOLONI DOCE DE LEITE 80G 487 Name: produto, dtype: int64
top_produtos_df = top_produtos.to_frame()
top_produtos_df = top_produtos_df.reset_index()
top_produtos_df.rename(columns = {'produto':'quantidade', 'index':'produto'}, inplace = True)
top_produtos_df.head()
| produto | quantidade | |
|---|---|---|
| 0 | CROISSANT | 2393 |
| 1 | PASTEL DE NATA 60G | 1339 |
| 2 | CROISSANT AMENDOAS 100G | 742 |
| 3 | FOLHADO MACA 80G | 719 |
| 4 | SANDUICHE DE PEPERONI BRIE | 596 |
fig_top_produtos = go.Figure(data=[go.Table(
header=dict(values=list(top_produtos_df.columns),
font=dict(color='white', size=12), # configurando a cor e tamanho da letra
fill_color='seagreen',
align='left'),
cells=dict(values=[top_produtos_df['produto'], top_produtos_df['quantidade'] ],
fill_color='lightcyan',
align='left'))
])
fig_top_produtos.update_layout(
title={
'text': "Produtos mais vendidos - (Fev 2022 - Out 22)",
'y':0.95,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'})
py.iplot(fig_top_produtos, filename='/home/claudio/results/grafico02 - Top_products_2022.html')
# find the percentage each item is selling
top_produtos_perc = transactions.produto.value_counts(True)
print (top_produtos_perc[:10])
CROISSANT 0.092067 PASTEL DE NATA 60G 0.051516 CROISSANT AMENDOAS 100G 0.028547 FOLHADO MACA 80G 0.027662 SANDUICHE DE PEPERONI BRIE 0.022930 COOKIES 80G 0.021507 CROISSANT IFOOD 0.020545 SOURDOUGH AZEITONA E ALECRIM 0.019891 FOLHADO FRANGO 160G 0.019237 BOMBOLONI DOCE DE LEITE 80G 0.018737 Name: produto, dtype: float64
top_produtos_perc_df = top_produtos_perc.to_frame().reset_index()
top_produtos_perc_df.rename(columns = {'produto':'proporção', 'index':'produto'}, inplace = True)
top_produtos_perc_df.head()
| produto | proporção | |
|---|---|---|
| 0 | CROISSANT | 0.092067 |
| 1 | PASTEL DE NATA 60G | 0.051516 |
| 2 | CROISSANT AMENDOAS 100G | 0.028547 |
| 3 | FOLHADO MACA 80G | 0.027662 |
| 4 | SANDUICHE DE PEPERONI BRIE | 0.022930 |
top_produtos_perc_df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 212 entries, 0 to 211 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 produto 212 non-null object 1 proporção 212 non-null float64 dtypes: float64(1), object(1) memory usage: 3.4+ KB
# https://www.dataquest.io/blog/tutorial-add-column-pandas-dataframe-based-on-if-else-condition/
# if a prduct has a percentage greater than 1,7%, its ranked,
# if lower than this its aggregate with 'outros'
top_produtos_perc_df['ranking']= np.where (
top_produtos_perc_df['proporção']> 0.017,
top_produtos_perc_df['produto'], 'outros' )
fig_top_produtos_perc = go.Figure(data=[go.Table(
header=dict(values=list(top_produtos_perc_df.columns),
font=dict(color='white', size=12), # configurando a cor e tamanho da letra
fill_color='seagreen',
align='left'),
cells=dict(values=[top_produtos_perc_df['produto'],
top_produtos_perc_df['proporção'],
top_produtos_perc_df['ranking'] ],
fill_color='lightcyan',
align='left'))
])
fig_top_produtos_perc.update_layout(
title={
'text': "Produtos mais vendidos proporcional - (Fev 2022 - Out 22)",
'y':0.95,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'})
py.iplot(fig_top_produtos_perc, filename='/home/claudio/results/grafico03 - top_products_proporc_2022.html')
fig_pie_3 = go.Figure(data=[go.Pie(labels=top_produtos_perc_df['ranking'], values=top_produtos_perc_df['proporção'])])
fig_pie_3.layout = dict(title="Percetual dos itens mais vendidos (Fev 2022 - Out 22) <br><sup></sup>", showlegend=True
)
py.iplot(fig_pie_3, filename='/home/claudio/results/grafico03A - Percetual_dos_itens_mais_vendidos.html')
## Get unique names in the "produto" column using Series.unique()
## https://sparkbyexamples.com/pandas/pandas-count-unique-values-in-column/
num_itens_vend = transactions.produto.unique().size
print('O número de diferentes itens vendido pela loja é:', num_itens_vend)
O número de diferentes itens vendido pela loja é: 212
# Look at the average number of items purchased...
avg_items = transactions.groupby(by='venda')['produto'].count().mean()
print('A média de itens por venda é: %.2f itens' %avg_items)
A média de itens por venda é: 2.79 itens
# ...and how that varies by time of day
single_transaction = pd.DataFrame(transactions.groupby(by='venda')['item_count'].sum())
single_transaction = single_transaction.merge(transactions[['hora','minuto','hora_do_dia','data', 'venda']]\
.drop_duplicates('venda'), left_on=single_transaction.index,
right_on='venda', how='left', copy=False)
print('O número médio de itens vendidos por período do dia é: \n{0}'.format(
single_transaction.groupby(by='hora_do_dia')['item_count'].mean()))
O número médio de itens vendidos por período do dia é: hora_do_dia manhã 3.046419 noite 2.663510 tarde 2.783376 Name: item_count, dtype: float64
# Find the frequency of each item purchased and by time of day
item_frequencies_all = transactions.produto.value_counts()
item_frequencies_morning = transactions[transactions.hora_do_dia == 'manhã'].produto.value_counts()
item_frequencies_afternoon = transactions[transactions.hora_do_dia == 'tarde'].produto.value_counts()
item_frequencies_evening = transactions[transactions.hora_do_dia == 'noite'].produto.value_counts()
# Plot the frequency of purchase for each item
# https://community.plotly.com/t/errors-accessing-plotly-api/33271
# ok, so I found a solution to my problem - when I let my program run through, the error message produced is:
#chart_studio.exceptions.PlotlyRequestError: Authentication credentials were not provided.
#I Googled this error and there were suggestions to run plotly in offline mode by replacing:
# Solving the problem of the 'plotly' online library with chart_studio
# https://plotly.com/python/getting-started-with-chart-studio/
## Now I'm using Plotly Express (px) and Graphics Objects (go) # 13/03/2023
button_layer_1_height = 1.12
button_layer_2_height = 1.065
item_threshold_all = 225
item_threshold_other = 125
# https://plotly.com/python/bar-charts/#basic-bar-charts-with-plotlygraphobjects
# Customizing Individual Bar Colors
# Cores das barras todo dia
colors1 = ['seagreen',] * len(transactions)
colors1[13]= 'crimson' # destacar a venda de café
# Cores das barras manhã
colors2 = ['seagreen',] * len(transactions)
colors2[12]= 'crimson' # destacar venda de café
# Cores das barras tarde
colors3 = ['seagreen',] * len(transactions)
colors3[11]= 'crimson' # destacar venda de café
# Cores das barras noite
colors4 = ['seagreen',] * len(transactions)
colors4[18]= 'crimson' # destacar venda de café
# https://plotly.com/python/bar-charts/#basic-bar-charts-with-plotlygraphobjects - customizando as barras
trace_all = go.Bar(
x=item_frequencies_all[item_frequencies_all > item_threshold_all].index,
y=item_frequencies_all[item_frequencies_all > item_threshold_all].values,
visible=True,
marker_color=colors1,
name='Dia Inteiro'
)
trace_morning = go.Bar(
x=item_frequencies_morning[item_frequencies_morning > item_threshold_other].index,
y=item_frequencies_morning[item_frequencies_morning > item_threshold_other].values,
visible=False,
marker_color= colors2,
name='Manhã'
)
trace_afternoon = go.Bar(
x=item_frequencies_afternoon[item_frequencies_afternoon > item_threshold_other].index,
y=item_frequencies_afternoon[item_frequencies_afternoon > item_threshold_other].values,
visible=False,
marker_color=colors3,
name='Tarde'
)
trace_evening = go.Bar(
x=item_frequencies_evening[item_frequencies_evening > item_threshold_other].index,
y=item_frequencies_evening[item_frequencies_evening > item_threshold_other].values,
visible=False,
marker_color= colors4,
name='Noite'
)
data04 = [trace_all, trace_morning, trace_afternoon, trace_evening]
# Inserir um update _layout para colocar um subtitle
# https://stackoverflow.com/questions/58166002/how-to-add-caption-subtitle-using-plotly-method-in-python
updatemenus = list([
dict(
buttons=list([
dict(label = 'Dia inteiro',
method = 'update',
args = [{'visible': [True, False, False, False]},
{'title': 'Os mais vendidos por período (Fev 2022 - Out 22) <br><sup>Dia inteiro - produtos com mais de 225 itens vendidos</sup>'}]),
dict(label = 'Manhã',
method = 'update',
args = [{'visible': [False, True, False, False]},
{'title': 'Os mais vendidos por período (Fev 2022 - Out 22) <br><sup>Manhã - produtos com mais de 125 itens vendidos</sup>'}]),
dict(label = 'Tarde',
method = 'update',
args = [{'visible': [False, False, True, False]},
{'title': 'Os mais vendidos por período (Fev 2022 - Out 22) <br><sup>Tarde - produtos com mais de 125 itens vendidos</sup>'}]),
dict(label = 'Noite',
method = 'update',
args = [{'visible': [False, False, False, True]},
{'title': 'Os mais vendidos por período (Fev 2022 - Out 22) <br><sup>Noite - produtos com mais de 125 itens vendidos</sup>'}])
]),
type='buttons',
direction = 'right',
pad = {'r': 10, 't': 10},
showactive = True,
x = 0.6, # range from -2 to 3
xanchor = 'left',
y = button_layer_1_height,
yanchor = 'top' )
])
layout04 = dict(title="Os mais vendidos por período (Fev 2022 - Out 22) <br><sup>Dia inteiro -- produtos com mais de 225 itens vendidos</sup>", showlegend=False,
updatemenus=updatemenus)
# to change the background color
layout04.update(
plot_bgcolor='lightcyan',
xaxis =dict(mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan'),
yaxis =dict(mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan')
)
fig04 = dict(data=data04, layout=layout04)
py.iplot(fig04, filename='/home/claudio/results/grafico04 - Frequencia_vendas_periodo_dia.html')
f_filter = (transactions['data']!='2022-06-23')#&(transactions['data']!='2022-07-30')
transactions[f_filter]
transactions_filtrada = transactions[f_filter]
transactions_filtrada['mes']=(transactions_filtrada['data'].dt.month)
/tmp/ipykernel_216/1953160630.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
# Find the frequency of each item purchased by month
item_frequencies_periodo = transactions_filtrada.produto.value_counts()
item_frequencies_jan = transactions_filtrada[transactions_filtrada.mes== 1].produto.value_counts()
item_frequencies_fev = transactions_filtrada[transactions_filtrada.mes== 2].produto.value_counts()
item_frequencies_mar = transactions_filtrada[transactions_filtrada.mes== 3].produto.value_counts()
item_frequencies_abr = transactions_filtrada[transactions_filtrada.mes== 4].produto.value_counts()
item_frequencies_mai = transactions_filtrada[transactions_filtrada.mes== 5].produto.value_counts()
item_frequencies_jun = transactions_filtrada[transactions_filtrada.mes== 6].produto.value_counts()
item_frequencies_jul = transactions_filtrada[transactions_filtrada.mes== 7].produto.value_counts()
item_frequencies_ago = transactions_filtrada[transactions_filtrada.mes== 8].produto.value_counts()
item_frequencies_set = transactions_filtrada[transactions_filtrada.mes== 9].produto.value_counts()
item_frequencies_out = transactions_filtrada[transactions_filtrada.mes== 10].produto.value_counts()
item_frequencies_nov = transactions_filtrada[transactions_filtrada.mes== 11].produto.value_counts()
item_frequencies_dez = transactions_filtrada[transactions_filtrada.mes== 12].produto.value_counts()
# Plot the frequency of purchase for each item by month
button_layer_1_height = 1.12
button_layer_2_height = 1.065
item_threshold_all = 225
item_threshold_other = 40
colors1 = ['seagreen',] * len(transactions)
trace_periodo = go.Bar(
x=item_frequencies_periodo[item_frequencies_periodo > item_threshold_all].index,
y=item_frequencies_periodo[item_frequencies_periodo > item_threshold_all].values, text=item_frequencies_periodo,
visible=True,
marker_color=colors1,
name='Todo periodo'
)
trace_jan = go.Bar(
x=item_frequencies_jan[item_frequencies_jan > item_threshold_other].index,
y=item_frequencies_jan[item_frequencies_jan > item_threshold_other].values,text=item_frequencies_jan,
visible=False,
marker_color=colors1,
name='Janeiro'
)
trace_fev = go.Bar(
x=item_frequencies_fev[item_frequencies_fev > item_threshold_other].index,
y=item_frequencies_fev[item_frequencies_fev > item_threshold_other].values,text=item_frequencies_fev,
visible=False,
marker_color=colors1,
name='Fevereiro'
)
trace_mar = go.Bar(
x=item_frequencies_mar[item_frequencies_mar > item_threshold_other].index,
y=item_frequencies_mar[item_frequencies_mar > item_threshold_other].values,text=item_frequencies_mar,
visible=False,
marker_color=colors1,
name='Março'
)
trace_abr = go.Bar(
x=item_frequencies_abr[item_frequencies_abr > item_threshold_other].index,
y=item_frequencies_abr[item_frequencies_abr > item_threshold_other].values,text=item_frequencies_abr,
visible=False,
marker_color=colors1,
name='Abril'
)
trace_mai = go.Bar(
x=item_frequencies_mai[item_frequencies_mai > item_threshold_other].index,
y=item_frequencies_mai[item_frequencies_mai > item_threshold_other].values,text=item_frequencies_mai,
visible=False,
marker_color=colors1,
name='Maio'
)
trace_jun = go.Bar(
x=item_frequencies_jun[item_frequencies_jun > item_threshold_other].index,
y=item_frequencies_jun[item_frequencies_jun > item_threshold_other].values,text=item_frequencies_jun,
visible=False,
marker_color=colors1,
name='Junho'
)
trace_jul = go.Bar(
x=item_frequencies_jul[item_frequencies_jul > 25].index,
y=item_frequencies_jul[item_frequencies_jul > 25].values,text=item_frequencies_jul,
visible=False,
marker_color=colors1,
name='Julho'
)
trace_ago = go.Bar(
x=item_frequencies_ago[item_frequencies_ago > item_threshold_other].index,
y=item_frequencies_ago[item_frequencies_ago > item_threshold_other].values,text=item_frequencies_ago,
visible=False,
marker_color=colors1,
name='Agosto'
)
trace_set = go.Bar(
x=item_frequencies_set[item_frequencies_set > item_threshold_other].index,
y=item_frequencies_set[item_frequencies_set > item_threshold_other].values,text=item_frequencies_set,
visible=False,
marker_color=colors1,
name='Setembro'
)
trace_out = go.Bar(
x=item_frequencies_out[item_frequencies_out > item_threshold_other].index,
y=item_frequencies_out[item_frequencies_out > item_threshold_other].values,text=item_frequencies_out,
visible=False,
marker_color=colors1,
name='Outubro'
)
trace_nov = go.Bar(
x=item_frequencies_nov[item_frequencies_nov > item_threshold_other].index,
y=item_frequencies_nov[item_frequencies_nov > item_threshold_other].values,text=item_frequencies_nov,
visible=False,
marker_color=colors1,
name='Novembro'
)
trace_dez = go.Bar(
x=item_frequencies_dez[item_frequencies_dez > item_threshold_other].index,
y=item_frequencies_dez[item_frequencies_dez > item_threshold_other].values,text=item_frequencies_dez,
visible=False,
marker_color=colors1,
name='Dezembro'
)
data_mes = [trace_periodo, trace_jan, trace_fev, trace_mar, trace_abr, trace_mai, trace_jun, trace_jul, trace_ago, trace_set, trace_out, trace_nov, trace_dez]
updatemenus2 = list([
dict
(
buttons=list([
dict(label = 'Todo Ano',
method = 'update',
args = [{'visible': [True, False, False, False, False, False, False, False, False, False, False, False, False]},
{'title': 'Vendas Farina por item - ano 2022 <br><sup>Todo Ano</sup>'}]),
dict(label = 'Janeiro',
method = 'update',
args = [{'visible': [False, True, False, False, False, False, False, False, False, False, False, False, False]},
{'title': 'Vendas Farina por item - ano 2022 <br><sup>Janeiro</sup>'}]),
dict(label = 'Fevereiro',
method = 'update',
args = [{'visible': [False, False, True, False, False, False, False, False, False, False, False, False, False]},
{'title': 'Vendas Farina por item - ano 2022 <br><sup>Fevereiro</sup>'}]),
dict(label = 'Março',
method = 'update',
args = [{'visible': [False, False, False, True, False, False, False, False, False, False, False, False, False]},
{'title': 'Vendas Farina por item - ano 2022 <br><sup>Março</sup>'}]),
dict(label = 'Abril',
method = 'update',
args = [{'visible': [False, False, False, False, True, False, False, False, False, False, False, False, False]},
{'title': 'Vendas Farina por item - ano 2022 <br><sup>Abril</sup>'}]),
dict(label = 'Maio',
method = 'update',
args = [{'visible': [False, False, False, False, False, True, False, False, False, False, False, False, False]},
{'title': 'Vendas Farina por item - ano 2022 <br><sup>Maio</sup>'}]),
dict(label = 'Junho',
method = 'update',
args = [{'visible': [False, False, False, False, False, False, True, False, False, False, False, False, False]},
{'title': 'Vendas Farina por item - ano 2022 <br><sup>Junho</sup>'}]),
dict(label = 'Julho',
method = 'update',
args = [{'visible': [False, False, False, False, False, False, False, True, False, False, False, False, False]},
{'title': 'Vendas Farina por item - ano 2022 <br><sup>Julho</sup>'}]),
dict(label = 'Agosto',
method = 'update',
args = [{'visible': [False, False, False, False, False, False, False, False, True, False, False, False, False]},
{'title': 'Vendas Farina por item - ano 2022 <br><sup>Agosto</sup>'}]),
dict(label = 'Setembro',
method = 'update',
args = [{'visible': [False, False, False, False, False, False, False, False, False, True, False, False, False]},
{'title': 'Vendas Farina por item - ano 2022 <br><sup>Setembro</sup>'}]),
dict(label = 'Outubro',
method = 'update',
args = [{'visible': [False, False, False, False, False, False, False, False, False, False, True, False, False]},
{'title': 'Vendas Farina por item - ano 2022 <br><sup>Outubro</sup>'}]),
dict(label = 'Novembro',
method = 'update',
args = [{'visible': [False, False, False, False, False, False, False, False, False, False, False, True, False]},
{'title': 'Vendas Farina por item - ano 2022 <br><sup>Novembro</sup>'}]),
dict(label = 'Dezembro',
method = 'update',
args = [{'visible': [False, False, False, False, False, False, False, False, False, False, False, False, True]},
{'title': 'Vendas Farina por item - ano 2022 <br><sup>Dezembro</sup>'}])
]),
type='buttons',
direction = 'right',
pad = {'r': 10, 't': 10},
showactive = True,
x = 0.3,
xanchor = 'left',
y = button_layer_1_height,
yanchor = 'top'
)
])
layout_mes = dict(title='Vendas Farina por item - ano 2022 <br><sup>Todo Ano</sup>', showlegend=False, updatemenus=updatemenus2)
# to change the background color
layout_mes.update(
plot_bgcolor='lightcyan',
xaxis =dict(mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan'),
yaxis =dict(mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan')
)
fig = dict(data=data_mes, layout=layout_mes)
py.iplot(fig, filename='/home/claudio/results/grafico05 - Frequencia_vendas_itens_mensal.html')
# Sales trend by date
# https://www.youtube.com/watch?v=Xjm7fOw6gjY
# use reset index for transforming the data in a variable not a index
grouped_by_date = pd.DataFrame(transactions.groupby(by='horario')['item_count'].sum().reset_index())
grouped_by_date.head()
| horario | item_count | |
|---|---|---|
| 0 | 2022-02-01 | 74 |
| 1 | 2022-02-02 | 88 |
| 2 | 2022-02-03 | 81 |
| 3 | 2022-02-04 | 110 |
| 4 | 2022-02-05 | 112 |
fig_grouped_by_date = go.Figure(data=[go.Table(
header=dict(values=list(['Data', 'Quantidade']),
font=dict(color='white', size=12), # configurando a cor e tamanho da letra
fill_color='seagreen',
align='left'),
cells=dict(values=[grouped_by_date['horario'],
grouped_by_date['item_count'],],
fill_color='lightcyan',
align='left'))
])
fig_grouped_by_date.update_layout (dict(title={
'text': "Venda de itens por dia - (Fev 2022 - Out 22)",
'y':0.95,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'}), showlegend=True)
py.iplot(fig_grouped_by_date, filename='/home/claudio/results/grafico06 - tabela_vendas_por_dia_v2.html')
vendas = px.line(grouped_by_date, x = 'horario', y = 'item_count', title = 'Itens vendidos por dia - Ano 2022', labels= {'item_count' : 'Quantidade','horario': 'Data'})
# update line color
vendas.update_traces(line_color='seagreen')
# update backgournd
vendas.update_layout(
plot_bgcolor='lightcyan',
xaxis =dict(mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan'),
yaxis =dict(mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan')
)
# adicionando anaotação
# https://plotly.com/python/styling-plotly-express/
vendas.add_annotation( # add a text callout with arrow
text="Vendas muito acima do padrão, dia 23 de Junho", x="2022-06-23", y=2300, arrowhead=1, showarrow=True
)
vendas.add_annotation( # add a text callout with arrow
text="Não foram informadas vendas <br /> entre 12 de Jul e 29 de Jul", x="2022-07-15", y=150, arrowhead=1, showarrow=True)
py.iplot(vendas, filename='/home/claudio/results/grafico07 - Vendas_por_dia_linha.html')
# ajustando o gráfio para retirar dois outliers (490 em 2022-07-30 , 2303 em 2022-06-23 ), utilizando-se de filtro com a função 'where'
indice_atip = np.where((grouped_by_date['item_count'] < 400))
linha=grouped_by_date.loc[indice_atip]
# https://towardsdatascience.com/line-chart-animation-with-plotly-on-jupyter-e19c738dc882
#linha=grouped_by_date: usar se precisar demonstrar os dois outliers
trace1 = go.Scatter(x=linha['horario'][:2],
y=linha['item_count'][:2],
mode='lines',
line=dict(color='seagreen', width=1.5), # mudamos a cor da linha
fill = 'tozeroy',
fillcolor = 'lightblue'
)
frames = [dict(data= [dict(type='scatter',
x=linha['horario'][:k+1],
y=linha['item_count'][:k+1]),
],
traces= [0],
)for k in range(1, len(linha)-1)]
layout = go.Layout(width=1180,
height=750,
showlegend=False,
hovermode='closest',
updatemenus=[
dict(
type='buttons', showactive=False,
y=1.08,
x=0.12,
xanchor='center',
yanchor='auto',
pad=dict(t=0, r=10),
buttons=[dict(label='Play',
method='animate',
args=[None,
dict(frame=dict(duration=30,
redraw=False),
transition=dict(duration=0),
fromcurrent=True,
mode='immediate')]
)]
),
]
)
layout.update(xaxis =dict(range=['2022-02-01', '2022-10-31'], autorange=False),
yaxis =dict(range=[0, 250], autorange=False), xaxis_title = "Data", yaxis_title = "Quantidade"); # ajustar o range se precisar colocar os outliers
fig_vendas = go.Figure(data=[trace1], frames=frames, layout=layout)
# Adicionamos Título
fig_vendas.update_layout (dict(title={
'text': "Vendas por dia desconsiderando os outliers - (Fev 2022 - Out 22)",
'y':0.95,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'}), showlegend=False, )
# mudamos o background
fig_vendas.update_layout(
plot_bgcolor='lightcyan',
xaxis =dict(mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan'),
yaxis =dict(mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan')
)
# adicionamos anotação
fig_vendas.add_annotation( # add a text callout with arrow
text="Retriramos os dias 23 de Junho e <br /> 30 de Julho", x="2022-07-15", y=200, arrowhead=1, showarrow=False)
py.iplot(fig_vendas, filename='/home/claudio/results/grafico08 - Vendas_por_dia_farina_interativo.html')
# Vendas atípicas no dia 23 de Junho de 2022 entre 16:00 e 17:00
df_invest = pd.DataFrame(transactions).query("data == 'Jun 23, 2022'").groupby(by='hora')['item_count'].sum().reset_index()
df_invest
| hora | item_count | |
|---|---|---|
| 0 | 7 | 8 |
| 1 | 9 | 5 |
| 2 | 10 | 5 |
| 3 | 11 | 7 |
| 4 | 12 | 4 |
| 5 | 13 | 8 |
| 6 | 14 | 5 |
| 7 | 15 | 3 |
| 8 | 16 | 2254 |
| 9 | 17 | 3 |
| 10 | 18 | 1 |
fig_atipicos = go.Figure(data=[go.Table(
header=dict(values=list({'Hora do dia', 'Quantidade'}),
font=dict(color='white', size=12), # configurando a cor e tamanho da letra
fill_color='seagreen',
align='left'),
cells=dict(values=[df_invest['hora'], df_invest['item_count']],
fill_color='lightcyan',
align='left'))
])
fig_atipicos.update_layout(
title={
'text': "Investigando dados atípicos - dia 23 Junho 2022",
'y':0.95,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'})
py.iplot(fig_atipicos, filename='/home/claudio/results/grafico09 - investiga_tabela.html')
investiga_dados = px.line(df_invest, x = 'hora', y = 'item_count', title = 'Investigando dados atipicos - <br />Vendas no dia 23 de Junho 2022 por hora do dia',
labels= {'item_count' : 'Quantidade','hora': 'Hora do dia'}, log_y= True)
# update line color
investiga_dados.update_traces(line_color='seagreen')
# update backgournd
investiga_dados.update_layout(
plot_bgcolor='lightcyan',
xaxis =dict(mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan'),
yaxis =dict(mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan')
)
# adicionando anaotação
# https://plotly.com/python/styling-plotly-express/
investiga_dados.add_annotation( # add a text callout with arrow
text="Vendas entre <br />16 e 17 horas", x=15.5, y=2.9, arrowhead=4, showarrow=True
)
py.iplot(investiga_dados, filename='/home/claudio/results/grafico10 - investiga_graf.html')
f_filter = (transactions['data']!='2022-06-23')&(transactions['data']!='2022-07-30')
transactions[f_filter]
transactions_filtrada = transactions[f_filter]
dds_vendas2= pd.DataFrame(transactions_filtrada[transactions_filtrada.tipo_de_dia == 'DDS'].groupby(by='horario')['item_count'].sum()).reset_index()
fds_vendas2= pd.DataFrame(transactions_filtrada[transactions_filtrada.tipo_de_dia == 'FDS'].groupby(by='horario')['item_count'].sum()).reset_index()
dds_vendas2.head()
| horario | item_count | |
|---|---|---|
| 0 | 2022-02-01 | 74 |
| 1 | 2022-02-02 | 88 |
| 2 | 2022-02-03 | 81 |
| 3 | 2022-02-04 | 110 |
| 4 | 2022-02-07 | 61 |
fds_vendas2.head()
| horario | item_count | |
|---|---|---|
| 0 | 2022-02-05 | 112 |
| 1 | 2022-02-06 | 121 |
| 2 | 2022-02-12 | 108 |
| 3 | 2022-02-13 | 186 |
| 4 | 2022-02-19 | 107 |
print('O menor número de vendas em um dia de semana foi: \n{0}'.format(
dds_vendas2.item_count.min()))
print('O maior número de vendas em um dia de semana foi: \n{0}'.format(
dds_vendas2.item_count.max()))
print('O número médio de vendas em um dia de semana é: \n{0}'.format(
dds_vendas2.item_count.mean()))
O menor número de vendas em um dia de semana foi: 1 O maior número de vendas em um dia de semana foi: 241 O número médio de vendas em um dia de semana é: 75.81005586592178
print('O menor número de vendas em um dia de fim-de-semana foi: \n{0}'.format(
fds_vendas2.item_count.min()))
print('O maior número de vendas em um dia fim-de-semana foi: \n{0}'.format(
fds_vendas2.item_count.max()))
print('O número médio de vendas em um dia fim-de-semana é: \n{0}'.format(
fds_vendas2.item_count.mean()))
O menor número de vendas em um dia de fim-de-semana foi: 65 O maior número de vendas em um dia fim-de-semana foi: 227 O número médio de vendas em um dia fim-de-semana é: 133.73611111111111
single_transaction = pd.DataFrame(transactions_filtrada.groupby(by='venda')['item_count'].sum())
single_transaction = single_transaction.merge(transactions_filtrada[['tipo_de_dia','horario', 'venda']]\
.drop_duplicates('venda'), left_on=single_transaction.index,
right_on='venda', how='left', copy=False)
print('O número médio de itens em cada venda por tipo de dia é: \n{0}'.format(
single_transaction.groupby(by='tipo_de_dia')['item_count'].mean()))
O número médio de itens em cada venda por tipo de dia é: tipo_de_dia DDS 2.650391 FDS 2.836230 Name: item_count, dtype: float64
# Histogram of items sold by type of day
trace_dds = go.Histogram(
x=dds_vendas2.item_count,
opacity=0.75,
name = 'Dia de Semana'
)
trace_fds = go.Histogram(
x=fds_vendas2.item_count,
opacity=0.65,
name = 'Fim-de-semana',
nbinsx = 16
)
data_10A = [trace_dds, trace_fds]
layout_10A = go.Layout( title = 'Distribuição das vendas por dias de semana/ finais de semana',
xaxis=dict(title='Número de Itens Vendidos'),
yaxis=dict(title='Frequencia de vendas'),
barmode='overlay'
)
fig_10A = go.Figure(data=data_10A, layout=layout_10A)
py.iplot(fig_10A, filename='/home/claudio/results/grafico10A - tipo-de_dia_itens_histograma.html')
# Sales trend by date
# https://www.youtube.com/watch?v=Xjm7fOw6gjY
# use reset index for transforming the data in a variable not a index
# Group by(horario), adding one more column (tipo_de_dia)
# https://www.easytweaks.com/pandas-groupby-to-dataframe/
tipo_de_dia_df= pd.DataFrame(transactions_filtrada.groupby(by=['horario','tipo_de_dia'])['item_count'].sum().reset_index())
tipo_de_dia_df.head()
| horario | tipo_de_dia | item_count | |
|---|---|---|---|
| 0 | 2022-02-01 | DDS | 74 |
| 1 | 2022-02-02 | DDS | 88 |
| 2 | 2022-02-03 | DDS | 81 |
| 3 | 2022-02-04 | DDS | 110 |
| 4 | 2022-02-05 | FDS | 112 |
# Line chart of items sold by type of day
# Create traces
tipo_dia_graf = go.Figure()
tipo_dia_graf.add_trace(go.Scatter(x=fds_vendas2['horario'], y=fds_vendas2['item_count'],
mode='lines',
name='Fim de Semana',
line=dict(color='darkblue', width=1.5)))
tipo_dia_graf.add_trace(go.Scatter(x=dds_vendas2['horario'], y=dds_vendas2['item_count'],
mode='lines',
name='Dia de Semana',
line=dict(color='seagreen', width=1.5))) # mudamos a cor da linha
tipo_dia_graf.update_layout ( title = {'text' : 'Venda de itens <br />Dias de semana X Finais de semana - 2022 (Fev-Out)'},
xaxis=dict(title='Data', mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan'),
yaxis=dict(title='Itens vendidos', mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightblue'),
plot_bgcolor='lightcyan', # update Background
)
py.iplot(tipo_dia_graf, filename='/home/claudio/results/grafico10B - tipo-de_dia_2_linhas.html')
dds_fatu= pd.DataFrame(transactions_filtrada[transactions_filtrada.tipo_de_dia == 'DDS'].groupby(by='horario')['total_item'].sum()).reset_index()
fds_fatu= pd.DataFrame(transactions_filtrada[transactions_filtrada.tipo_de_dia == 'FDS'].groupby(by='horario')['total_item'].sum()).reset_index()
dds_fatu.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 179 entries, 0 to 178 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 horario 179 non-null object 1 total_item 179 non-null float64 dtypes: float64(1), object(1) memory usage: 2.9+ KB
# Histograma
trace_dds = go.Histogram(
x=dds_fatu[dds_fatu.total_item < 7900].total_item,
opacity=0.75,
name = 'Dia de Semana'
)
trace_fds = go.Histogram(
x=fds_fatu[fds_fatu.total_item < 7900].total_item,
opacity=0.65,
name = 'Fim-de-semana',
nbinsx = 16
)
data_11 = [trace_dds, trace_fds]
layout_11 = go.Layout( title = 'Distribuição do faturamento por dias de semana/ finais de semana',
xaxis=dict(title='Faturamento por dia'),
yaxis=dict(title='Frequencia de faturamento diário'),
barmode='overlay'
)
fig_11 = go.Figure(data=data_11, layout=layout_11)
py.iplot(fig_11, filename='/home/claudio/results/grafico11 - tipo-de_dia_faturamento_histograma.html')
# Line chart of items sold by type of day
# Create traces
tipo_dia_fatu_graf = go.Figure()
tipo_dia_fatu_graf.add_trace(go.Scatter(x=fds_fatu['horario'], y=fds_fatu['total_item'],
mode='lines',
name='Fim de Semana',
line=dict(color='darkblue', width=1.5)))
tipo_dia_fatu_graf.add_trace(go.Scatter(x=dds_fatu['horario'], y=dds_fatu['total_item'],
mode='lines',
name='Dia de Semana',
line=dict(color='seagreen', width=1.5))) # mudamos a cor da linha
tipo_dia_fatu_graf.update_layout ( title = {'text' : 'Faturamento <br />Dias de semana X Finais de semana - 2022 (Fev-Out)'},
xaxis=dict(title='Data', mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan'),
yaxis=dict(title='Faturamento', mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightblue'),
plot_bgcolor='lightcyan', # update Background
)
py.iplot(tipo_dia_fatu_graf, filename='/home/claudio/results/grafico11B - faturamento_tipo_de_dia_2_linhas.html')
grouped_by_date_filtrada = pd.DataFrame(transactions_filtrada.groupby(by='horario')['item_count'].sum()).reset_index()
grouped_by_date_filtrada.head()
| horario | item_count | |
|---|---|---|
| 0 | 2022-02-01 | 74 |
| 1 | 2022-02-02 | 88 |
| 2 | 2022-02-03 | 81 |
| 3 | 2022-02-04 | 110 |
| 4 | 2022-02-05 | 112 |
trace_todos_itens = go.Figure(data=[go.Box(y= grouped_by_date_filtrada['item_count'], boxpoints= "outliers", jitter = 0.5, whiskerwidth= 1, notchwidth=0.5, fillcolor= 'seagreen', line=dict(color="seagreen" )
)])
## adicionei uma anotação ao boxplot - https://community.plotly.com/t/how-to-save-labels-on-plotly-box-plot-instead-of-it-disappearing-when-not-hovering-on-it/54637
trace_todos_itens.add_annotation(x=0, y=250, #Min
text="Dias com as vendas acima do normal - Outliers",
font=dict(size=16),
showarrow=True, arrowwidth= 2, arrowhead= 4
)
trace_todos_itens.update_layout(title= {'text': "Distribuição dos dias por quantidade de itens vendidos - 2022 (Fev-Out)",
'y':0.95,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'},
xaxis=dict(title='Hora do dia', mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan'),
yaxis=dict(title='', mirror=True, showline=True, linecolor='black', gridcolor='lightblue'),
plot_bgcolor='lightcyan') # update Background
##### adicionar comentário ao boxplot
box_itens_por_dia= go.Figure(data = trace_todos_itens, layout = layout)
py.iplot(box_itens_por_dia, filename = '/home/claudio/results/grafico12 - Boxplot_Vendas_itens_ranking.html')
# Find the number of itens sold per day of week
agregado_vendas_por_dia_2 = transactions_filtrada.groupby(by=['dia_da_semana','horario'])['item_count'].sum()
agregado_vendas_por_dia_2 = pd.DataFrame(agregado_vendas_por_dia_2).reset_index()
agregado_vendas_por_dia_2.head(10)
| dia_da_semana | horario | item_count | |
|---|---|---|---|
| 0 | domingo | 2022-02-06 | 121 |
| 1 | domingo | 2022-02-13 | 186 |
| 2 | domingo | 2022-02-20 | 156 |
| 3 | domingo | 2022-02-27 | 86 |
| 4 | domingo | 2022-03-06 | 126 |
| 5 | domingo | 2022-03-13 | 145 |
| 6 | domingo | 2022-03-20 | 140 |
| 7 | domingo | 2022-03-27 | 121 |
| 8 | domingo | 2022-04-03 | 93 |
| 9 | domingo | 2022-04-10 | 77 |
agregado_vendas_por_dia_2 = agregado_vendas_por_dia_2[~agregado_vendas_por_dia_2.index.duplicated(keep='first')]
dias = ['segunda','terça','quarta','quinta','sexta','sábado','domingo']
vendas_por_dia = []
for dia in dias:
df_dias= agregado_vendas_por_dia_2[agregado_vendas_por_dia_2.dia_da_semana == dia]
vendas_por_dia.append(list(df_dias['item_count']))
vendas_por_dia_item_graf = px.box(agregado_vendas_por_dia_2, y = 'item_count', x = 'dia_da_semana', color='dia_da_semana', labels={'item_count': 'Número de Itens Vendidos','dia_da_semana':'Dia da Semana'}, hover_data = [agregado_vendas_por_dia_2.item_count], points="all",
category_orders= {"dia_da_semana": ['sábado','domingo','segunda','terça','quarta','quinta','sexta']})
vendas_por_dia_item_graf.update_layout(title= {'text': "Distribuição das vendas por itens e dia da semana - 2022 (Fev-Out)",
'y':0.95,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'})
py.plot(vendas_por_dia_item_graf, filename='/home/claudio/results/grafico13 - Vendas_agregadas_itens_dias_semana_PX.html')
'/home/claudio/results/grafico13 - Vendas_agregadas_itens_dias_semana_PX.html'
expr: syntax error: missing argument after ‘8’
/usr/share/atom/atom: bad option: --executed-from=/home/claudio/farina_in_natura001 /usr/share/atom/atom: bad option: --pid=1670
traces = []
for i, j in enumerate(dias):
trace = go.Box(
y=vendas_por_dia[i],
name = j
)
traces.append(trace)
data_13 = traces
layout_13 = go.Layout(
title={
'text': "Distribuição das vendas por dia da semana - 2022 (Fev-Out)",
'y':0.95,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'},
xaxis=dict(title='Dia da Semana', mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan'),
yaxis=dict(title='Número de Itens Vendidos', mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightblue'),
plot_bgcolor='lightcyan', # update Background
showlegend= False)
fig_13 = go.Figure(data = data_13, layout = layout_13)
py.iplot(fig_13, filename='/home/claudio/results/grafico13 - Vendas_por_dia_da_semana_BoxplotV2.html')
dia_hora_df = transactions_filtrada[[ 'hora_minuto','dia_da_semana', 'hora', 'item_count']]
vendas_por_dia_hora_graf = px.box(dia_hora_df, y = 'hora', x = 'dia_da_semana', color='dia_da_semana', labels={'hora': 'Hora','dia_da_semana':'Dia da Semana'}, hover_data = [dia_hora_df.hora_minuto], points="all",
category_orders= {"dia_da_semana": ['sábado','domingo','segunda','terça','quarta','quinta','sexta']})
vendas_por_dia_hora_graf.update_layout(title= {'text': "Distribuição das vendas por hora e dia da semana - 2022 (Fev-Out)",
'y':0.95,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'})
py.iplot(vendas_por_dia_hora_graf, filename='/home/claudio/results/grafico14 - Vendas_agregadas_Hora_dias_semana.html')
# https://stackoverflow.com/questions/59815797/how-to-save-plotly-express-plot-into-a-html-or-static-image-file
# fig.write_html(r"C:\Users\ctobr\OneDrive\DATA_Science\1_farina_in_natura\farina_in_natura00\Vendas_por_dia_Boxplot_simples2.html")
trace_todos = go.Figure(data=[go.Box(x= dia_hora_df['hora'], boxpoints= "outliers", jitter = 0.5, whiskerwidth= 1, notchwidth=0.5, name= "Diariamente", fillcolor= 'seagreen', line=dict(color="seagreen" )
)])
## adicionei uma anotação ao boxplot - https://community.plotly.com/t/how-to-save-labels-on-plotly-box-plot-instead-of-it-disappearing-when-not-hovering-on-it/54637
trace_todos.add_annotation(x=8, y=0.05, #Min
text="25% das vendas do dia",
font=dict(size=16),
showarrow=True, arrowwidth= 2, arrowhead= 4
)
trace_todos.add_annotation(x=14, y=0.25, #Min
text="25% das vendas do dia ",
font=dict(size=16),
showarrow=True, arrowwidth= 2, arrowhead= 4
)
trace_todos.add_annotation(x=17.25, y=0.25, #Min
text="25% das vendas do dia ",
font=dict(size=16),
showarrow=True, arrowwidth= 2, arrowhead= 4
)
trace_todos.add_annotation(x=20, y=0.05, #Min
text="25% das vendas do dia ",
font=dict(size=16),
showarrow=True, arrowwidth= 2, arrowhead= 4
)
trace_todos.update_layout(title= {'text': "Distribuição das vendas por hora do dia - 2022 (Fev-Out)",
'y':0.95,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'},
xaxis=dict(title='Hora do dia', mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightblue'),
yaxis=dict(title='', mirror=True, showline=True, linecolor='black', gridcolor='lightcyan'),
plot_bgcolor='lightcyan') # update Background
##### adicionar comentário ao boxplot
todos_dias_graf = go.Figure(data = trace_todos, layout = layout)
py.iplot(todos_dias_graf, filename = '/home/claudio/results/grafico15 - Boxplot_Vendas_agregadas_hora_GO.html')
# https://www.youtube.com/watch?v=AwtQ7vJg0-o
dom = dia_hora_df.hora[dia_hora_df['dia_da_semana']== 'domingo']
seg = dia_hora_df.hora[dia_hora_df['dia_da_semana']== 'segunda']
ter = dia_hora_df.hora[dia_hora_df['dia_da_semana']== 'terça']
qua = dia_hora_df.hora[dia_hora_df['dia_da_semana']== 'quarta']
qui = dia_hora_df.hora[dia_hora_df['dia_da_semana']== 'quinta']
sex = dia_hora_df.hora[dia_hora_df['dia_da_semana']== 'sexta']
sab = dia_hora_df.hora[dia_hora_df['dia_da_semana']== 'sábado']
trace0 = go.Box(
y= dom, boxpoints= "outliers", jitter = 0.5,
name = 'Domingo', whiskerwidth= 1, notchwidth=0.5
)
trace1 = go.Box(
y= seg, boxpoints = 'outliers', jitter = 0.5,
name = 'Segunda', whiskerwidth= 1, notchwidth=0.5
)
trace2 = go.Box(
y= ter, boxpoints = 'outliers', jitter = 0.5,
name = 'Terça', whiskerwidth= 1, notchwidth=0.5
)
trace3 = go.Box(
y= qua, boxpoints = 'outliers', jitter = 0.5,
name = 'Quarta', whiskerwidth= 1, notchwidth=0.5
)
trace4 = go.Box(
y= qui,boxpoints = 'outliers', jitter = 0.5,
name = 'Quinta', whiskerwidth= 1, notchwidth=0.5
)
trace5 = go.Box(
y= sex,boxpoints = 'outliers', jitter = 0.5,
name = 'Sexta', whiskerwidth= 1, notchwidth=0.5
)
trace6 = go.Box(
y= sab,boxpoints = 'outliers', jitter = 0.5,
name = 'Sábado', whiskerwidth= 1, notchwidth=0.5
)
data_16 = [trace6, trace0, trace1, trace2, trace3, trace4, trace5]
layout_16 = go.Layout(title={'text': "Distribuição das vendas por dia da semana e hora do dia - 2022 (Fev-Out)",
'y':0.95,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'},
xaxis=dict(title='Dia da Semana', mirror=True, showline=True, linecolor='black', gridcolor='lightcyan'),
yaxis=dict(title='Hora do dia', mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightblue'),
plot_bgcolor='lightcyan', showlegend= False,
) # update Background
fig_16 = go.Figure(data = data_16, layout = layout_16)
py.iplot(fig_16, filename='/home/claudio/results/grafico16 - Vendas_hora_por_dia_Boxplot_GO.html')
# Criar o percentual por hora de vendas com filtro por dia da semana
vendas_por_hora_perc_all = transactions_filtrada.hora.value_counts(True)
vendas_por_hora_perc_dom = transactions_filtrada[transactions_filtrada.dia_da_semana=="domingo"].hora.value_counts(True)
vendas_por_hora_perc_seg = transactions_filtrada[transactions_filtrada.dia_da_semana=="segunda"].hora.value_counts(True)
vendas_por_hora_perc_ter = transactions_filtrada[transactions_filtrada.dia_da_semana=="terça"].hora.value_counts(True)
vendas_por_hora_perc_qua = transactions_filtrada[transactions_filtrada.dia_da_semana=="quarta"].hora.value_counts(True)
vendas_por_hora_perc_qui = transactions_filtrada[transactions_filtrada.dia_da_semana=="quinta"].hora.value_counts(True)
vendas_por_hora_perc_sex = transactions_filtrada[transactions_filtrada.dia_da_semana=="sexta"].hora.value_counts(True)
vendas_por_hora_perc_sab = transactions_filtrada[transactions_filtrada.dia_da_semana=="sábado"].hora.value_counts(True)
# Pandas: How to Represent value_counts as Percentage
# https://www.statology.org/pandas-value_counts-percentage/
vendas_por_hora_perc_all = transactions_filtrada.hora.value_counts(normalize=True).mul(100).round(2).astype(str) + '%'
vendas_por_hora_perc_dom = transactions_filtrada[transactions_filtrada.dia_da_semana=="domingo"].hora.value_counts(normalize=True).mul(100).round(2).astype(str) + '%'
vendas_por_hora_perc_seg = transactions_filtrada[transactions_filtrada.dia_da_semana=="segunda"].hora.value_counts(normalize=True).mul(100).round(2).astype(str) + '%'
vendas_por_hora_perc_ter = transactions_filtrada[transactions_filtrada.dia_da_semana=="terça"].hora.value_counts(normalize=True).mul(100).round(2).astype(str) + '%'
vendas_por_hora_perc_qua = transactions_filtrada[transactions_filtrada.dia_da_semana=="quarta"].hora.value_counts(normalize=True).mul(100).round(2).astype(str) + '%'
vendas_por_hora_perc_qui = transactions_filtrada[transactions_filtrada.dia_da_semana=="quinta"].hora.value_counts(normalize=True).mul(100).round(2).astype(str) + '%'
vendas_por_hora_perc_sex = transactions_filtrada[transactions_filtrada.dia_da_semana=="sexta"].hora.value_counts(normalize=True).mul(100).round(2).astype(str) + '%'
vendas_por_hora_perc_sab = transactions_filtrada[transactions_filtrada.dia_da_semana=="sábado"].hora.value_counts(normalize=True).mul(100).round(2).astype(str) + '%'
# Pandas: How to Represent value_counts as Percentage
# https://www.statology.org/pandas-value_counts-percentage/
vendas_por_hora_perc_all = transactions_filtrada.hora.value_counts(normalize=True).mul(100).round(2)
vendas_por_hora_perc_dom = transactions_filtrada[transactions_filtrada.dia_da_semana=="domingo"].hora.value_counts(normalize=True).mul(100).round(2)
vendas_por_hora_perc_seg = transactions_filtrada[transactions_filtrada.dia_da_semana=="segunda"].hora.value_counts(normalize=True).mul(100).round(2)
vendas_por_hora_perc_ter = transactions_filtrada[transactions_filtrada.dia_da_semana=="terça"].hora.value_counts(normalize=True).mul(100).round(2)
vendas_por_hora_perc_qua = transactions_filtrada[transactions_filtrada.dia_da_semana=="quarta"].hora.value_counts(normalize=True).mul(100).round(2)
vendas_por_hora_perc_qui = transactions_filtrada[transactions_filtrada.dia_da_semana=="quinta"].hora.value_counts(normalize=True).mul(100).round(2)
vendas_por_hora_perc_sex = transactions_filtrada[transactions_filtrada.dia_da_semana=="sexta"].hora.value_counts(normalize=True).mul(100).round(2)
vendas_por_hora_perc_sab = transactions_filtrada[transactions_filtrada.dia_da_semana=="sábado"].hora.value_counts(normalize=True).mul(100).round(2)
# Plotar o percentual em grafico de barras
button_layer_1_height = 1.12
button_layer_2_height = 1.065
colors2 = ['seagreen',]
trace_semana= go.Bar(
x=vendas_por_hora_perc_all.index,
y=vendas_por_hora_perc_all.values,
text= vendas_por_hora_perc_all,
visible=True,
marker_color=colors2,
name='Semana'
)
trace_dom = go.Bar(
x=vendas_por_hora_perc_dom.index,
y=vendas_por_hora_perc_dom.values,
text= vendas_por_hora_perc_dom,
visible=False,
marker_color=colors2,
name='Domingo'
)
trace_seg = go.Bar(
x=vendas_por_hora_perc_seg.index,
y=vendas_por_hora_perc_seg.values,
text= vendas_por_hora_perc_seg,
visible=False,
marker_color=colors2,
name='Segunda'
)
trace_ter = go.Bar(
x=vendas_por_hora_perc_ter.index,
y=vendas_por_hora_perc_ter.values,
text = vendas_por_hora_perc_ter,
visible=False,
marker_color=colors2,
name='Terça'
)
trace_qua = go.Bar(
x=vendas_por_hora_perc_qua.index,
y=vendas_por_hora_perc_qua.values,
text = vendas_por_hora_perc_qua,
visible=False,
marker_color=colors2,
name='Quarta'
)
trace_qui = go.Bar(
x=vendas_por_hora_perc_qui.index,
y=vendas_por_hora_perc_qui.values,
text = vendas_por_hora_perc_qui,
visible=False,
marker_color=colors2,
name='Quinta'
)
trace_sex = go.Bar(
x=vendas_por_hora_perc_sex.index,
y=vendas_por_hora_perc_sex.values,
text = vendas_por_hora_perc_sex,
visible=False,
marker_color=colors2,
name='Sexta'
)
trace_sab = go.Bar(
x=vendas_por_hora_perc_sab.index,
y=vendas_por_hora_perc_sab.values,
text = vendas_por_hora_perc_sab,
visible=False,
marker_color=colors2,
name='Sábado'
)
data_vend_hor_dia = [trace_semana, trace_dom, trace_seg, trace_ter, trace_qua, trace_qui, trace_sex, trace_sab]
updatemenus_ven_hor_dia = list([
dict
(
buttons=list([
dict(label = 'Semana',
method = 'update',
args = [{'visible': [True, False, False, False, False, False, False, False]},
{'title': 'Percentual das vendas por hora - 2022<br><sup>Semana</sup>'}]),
dict(label = 'Domingo',
method = 'update',
args = [{'visible': [False, True, False, False, False, False, False, False]},
{'title': 'Percentual das vendas por hora - 2022<br><sup>Domingo</sup>'}]),
dict(label = 'Segunda',
method = 'update',
args = [{'visible': [False, False, True, False, False, False, False, False]},
{'title': 'Percentual das vendas por hora - 2022<br><sup>Segunda</sup>'}]),
dict(label = 'Terça',
method = 'update',
args = [{'visible': [False, False, False, True, False, False, False, False]},
{'title': 'Percentual das vendas por hora - 2022<br><sup>Terça</sup>'}]),
dict(label = 'Quarta',
method = 'update',
args = [{'visible': [False, False, False, False, True, False, False, False]},
{'title': 'Percentual das vendas por hora - 2022<br><sup>Quarta</sup>'}]),
dict(label = 'Quinta',
method = 'update',
args = [{'visible': [False, False, False, False, False, True, False, False]},
{'title': 'Percentual das vendas por hora - 2022<br><sup>Quinta</sup>'}]),
dict(label = 'Sexta',
method = 'update',
args = [{'visible': [False, False, False, False, False, False, True, False]},
{'title': 'Percentual das vendas por hora - 2022<br><sup>Sexta</sup>'}]),
dict(label = 'Sábado',
method = 'update',
args = [{'visible': [False, False, False, False, False, False, False, True]},
{'title': 'Percentual das vendas por hora - 2022<br><sup>Sábado</sup>'}])
]),
type='buttons',
direction = 'right',
pad = {'r': 10, 't': 10},
showactive = True,
x = 0.3,
xanchor = 'left',
y = button_layer_1_height,
yanchor = 'top'
)
])
layout_ven_hor_dia = dict(title='Percentual das vendas por hora - 2022 <br><sup>Semana</sup>', showlegend=False,
updatemenus= updatemenus_ven_hor_dia)
# to change the background color
layout_ven_hor_dia.update(
plot_bgcolor='lightcyan',
xaxis =dict(title = "Hora do dia", mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan'),
yaxis =dict(title = "Percentual (%)", mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan')
)
fig_16B = dict(data=data_vend_hor_dia, layout=layout_ven_hor_dia)
py.plot(fig_16B, filename='/home/claudio/results/grafico016B - percentual_vendas_hora_dia.html')
'/home/claudio/results/grafico016B - percentual_vendas_hora_dia.html'
expr: syntax error: missing argument after ‘8’
/usr/share/atom/atom: bad option: --executed-from=/home/claudio/farina_in_natura001 /usr/share/atom/atom: bad option: --pid=1885
group_by_data_e_hora_do_dia = transactions_filtrada.groupby(by=['dia_da_semana','hora_do_dia'])['item_count'].sum()
# https://stackoverflow.com/questions/39275294/sort-by-certain-order-situation-pandas-dataframe-groupby
# #### Alterando a ordem de apresentação dos dias da semana(Seg - Dom), usando a lista 'dias' anteriormente criada
group_by_data_e_hora_do_dia = group_by_data_e_hora_do_dia.reindex(dias, level= 'dia_da_semana')
group_by_data_e_hora_do_dia = pd.DataFrame(group_by_data_e_hora_do_dia).reset_index()
group_by_data_e_hora_do_dia
| dia_da_semana | hora_do_dia | item_count | |
|---|---|---|---|
| 0 | segunda | manhã | 587 |
| 1 | segunda | noite | 1354 |
| 2 | segunda | tarde | 548 |
| 3 | terça | manhã | 545 |
| 4 | terça | noite | 1359 |
| 5 | terça | tarde | 619 |
| 6 | quarta | manhã | 642 |
| 7 | quarta | noite | 1403 |
| 8 | quarta | tarde | 811 |
| 9 | quinta | manhã | 526 |
| 10 | quinta | noite | 1459 |
| 11 | quinta | tarde | 729 |
| 12 | sexta | manhã | 744 |
| 13 | sexta | noite | 1575 |
| 14 | sexta | tarde | 669 |
| 15 | sábado | manhã | 1512 |
| 16 | sábado | noite | 2096 |
| 17 | sábado | tarde | 986 |
| 18 | domingo | manhã | 2310 |
| 19 | domingo | noite | 1793 |
| 20 | domingo | tarde | 932 |
group_by_data_e_hora_do_dia[group_by_data_e_hora_do_dia.hora_do_dia == 'manhã'].item_count
0 587 3 545 6 642 9 526 12 744 15 1512 18 2310 Name: item_count, dtype: int64
trace_manhã = go.Bar(
y = group_by_data_e_hora_do_dia[group_by_data_e_hora_do_dia.hora_do_dia == 'manhã'].item_count,
x = group_by_data_e_hora_do_dia[group_by_data_e_hora_do_dia.hora_do_dia == 'manhã'].dia_da_semana,
name = "manhã",
opacity=0.6,
)
trace_tarde = go.Bar(
y = group_by_data_e_hora_do_dia[group_by_data_e_hora_do_dia.hora_do_dia == 'tarde'].item_count,
x = group_by_data_e_hora_do_dia[group_by_data_e_hora_do_dia.hora_do_dia == 'tarde'].dia_da_semana,
name = "tarde",
opacity=0.6
)
trace_noite = go.Bar(
y = group_by_data_e_hora_do_dia[group_by_data_e_hora_do_dia.hora_do_dia == 'noite'].item_count,
x = group_by_data_e_hora_do_dia[group_by_data_e_hora_do_dia.hora_do_dia == 'noite'].dia_da_semana,
name = "noite",
opacity=0.6
)
layout = go.Layout(title={'text': 'Vendas por período e dia da semana - 2022 (Fev - Out) ','y':0.95, 'x':0.5, 'xanchor': 'center', 'yanchor': 'top' },
xaxis = dict(title = 'Dia da Semana', showline=True, linecolor='black', gridcolor='lightcyan'),
yaxis = dict(title = 'Número de Itens Vendidos',mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightblue'),
plot_bgcolor='lightcyan',
hovermode= 'x')
data = [trace_manhã, trace_tarde, trace_noite]
fig = go.Figure(data = data, layout = layout)
py.iplot(fig, filename='/home/claudio/results/grafico17 - vendas-por-dia-periodo.html')
vendas_por_hora = pd.DataFrame(
transactions_filtrada[(transactions_filtrada.hora > 1) &
(transactions_filtrada.hora < 23)].groupby(by='hora')['item_count'].sum())
vendas_por_hora
| item_count | |
|---|---|
| hora | |
| 6 | 41 |
| 7 | 524 |
| 8 | 1289 |
| 9 | 1749 |
| 10 | 1817 |
| 11 | 1446 |
| 12 | 737 |
| 13 | 627 |
| 14 | 769 |
| 15 | 1249 |
| 16 | 1912 |
| 17 | 3279 |
| 18 | 2917 |
| 19 | 2416 |
| 20 | 2094 |
| 21 | 333 |
trace = go.Scatter(x=list(vendas_por_hora.index),
y=list(vendas_por_hora.item_count),
fill = 'tozeroy',
fillcolor = 'lightblue')
data = [trace]
layout = dict(
title= {'text' : 'Itens vendidos por hora - 2022 (Fev - Out)', 'y':0.95, 'x':0.5, 'xanchor': 'center', 'yanchor': 'top'},
yaxis = dict( title = 'Volume de Itens Vendidos', gridcolor='lightblue'),
xaxis=dict( title = 'Hora',
rangeselector=dict(
buttons=list([
dict(count = 12,
label='Manhã',
step='hour',
stepmode='backward'),
dict(count = 8,
label='Tarde',
step='hour',
stepmode='backward'),
dict(count = 4,
label='Noite',
step='hour',
stepmode='backward'),
dict(step='all')
]),
visible = True
),
rangeslider=dict(
visible = True
),
gridcolor='lightcyan'),
plot_bgcolor='lightcyan'
)
fig = dict(data=data, layout=layout)
py.iplot(fig, filename = '/home/claudio/results/grafico18 - vendas-por-hora.html')
# Create a list of lists that is conducive to the format expected from apyori
list_of_lists = []
for transaction in list(set(transactions_filtrada.venda)):
df = transactions_filtrada[transactions_filtrada.venda == transaction]
values = df.produto.values
list_of_lists.append(list(values))
# imprting the 'apriori' function from the 'apyori' module
from apyori import apriori
# Create a list of association rules
association_rules = apriori(list_of_lists, min_confidence = 0.2, min_support = 0.005,min_lift = 2, min_length=2)
association_results = list(association_rules)
# print(association_results)
for item in association_results:
# first index of the inner list
# Contains base item and add item
pair = item[0]
items = [x for x in pair]
print("Rule: " + items[0] + " -> " + items[1])
#second index of the inner list
print("Support: " + str(item[1]))
#third index of the list located at 0th
#of the third index of the inner list
print("Confidence: " + str(item[2][0][2]))
print("Lift: " + str(item[2][0][3]))
print("=====================================")
Rule: ADICIONAL DE MUSSARELA -> ADICIONAL DE PRESUNTO Support: 0.0061068702290076335 Confidence: 0.5416666666666666 Lift: 70.95833333333333 ===================================== Rule: CROISSANT -> ADICIONAL DE MUSSARELA Support: 0.009395184967704051 Confidence: 0.8333333333333333 Lift: 4.038607474862455 ===================================== Rule: CROISSANT -> ADICIONAL DE PRESUNTO Support: 0.006928948913681738 Confidence: 0.9076923076923077 Lift: 4.398975526465566 ===================================== Rule: CROISSANT -> AGUA MINERAL S/GAS 500ML Support: 0.01996476805637111 Confidence: 0.4197530864197531 Lift: 2.0342615428936814 ===================================== Rule: PAO SOURDOUGH OVOS -> BOWL DE IOGURTE FRUTAS AVEIA Support: 0.007868467410452143 Confidence: 0.24814814814814815 Lift: 9.107678799489145 ===================================== Rule: CROISSANT -> CAFE CATUAI VERMELHO C/LEITE G Support: 0.007868467410452143 Confidence: 0.43506493506493504 Lift: 2.1084677985645546 ===================================== Rule: CROISSANT -> CAFE CATUAI VERMELHO G 200ML Support: 0.013740458015267175 Confidence: 0.46799999999999997 Lift: 2.2680819578827545 ===================================== Rule: CROISSANT -> CAFE COLMEIA C/ LEITE G 200ML Support: 0.00551967116852613 Confidence: 0.5280898876404494 Lift: 2.5592973211487915 ===================================== Rule: CROISSANT -> CAFE COLMEIA G 200ML Support: 0.007633587786259542 Confidence: 0.4276315789473684 Lift: 2.0724433094688917 ===================================== Rule: CROISSANT -> LARANJA 400 ML Support: 0.021374045801526718 Confidence: 0.6275862068965518 Lift: 3.041489215551587 ===================================== Rule: CROISSANT BRIE PARMA MEL DE -> SUCO DE LARANJA 400ML Support: 0.005989430416911333 Confidence: 0.2451923076923077 Lift: 5.394864341085272 ===================================== Rule: CROISSANT IFOOD -> PASTEL DE NATA 60G IFOOD Support: 0.005167351732237228 Confidence: 0.4356435643564356 Lift: 9.391151773405188 ===================================== Rule: MINI CROISSANT PAIN DE CHOC -> MINI CROISSANT 25G KIT C 5 Support: 0.00540223135642983 Confidence: 0.2358974358974359 Lift: 5.046901172529314 ===================================== Rule: CROISSANT -> ADICIONAL DE MUSSARELA Support: 0.005637110980622431 Confidence: 0.5 Lift: 72.16101694915254 =====================================
rule_list = []
for item in association_results:
# first index of the inner list
# Contains base item and add item
pair = item[0]
items = [x for x in pair]
rule_list.append ( items[0] + " -> " + items[1])
support_list =[]
for item in association_results:
# first index of the inner list
# Contains base item and add item
pair = item[0]
#items = [x for x in pair]
#second index of the inner list
support_list.append( str(item[1]))
confidence_list =[]
for item in association_results:
# first index of the inner list
# Contains base item and add item
pair = item[0]
items = [x for x in pair]
#second index of the inner list
confidence_list.append(str(item[2][0][2]))
lift_list =[]
for item in association_results:
# first index of the inner list
# Contains base item and add item
pair = item[0]
items = [x for x in pair]
#second index of the inner list
lift_list.append(str(item[2][0][3]))
association_results_df=pd.DataFrame(list
(zip(rule_list, support_list, confidence_list, lift_list)), columns=['Rule','Support', 'Confidence', 'Lift'])
association_results_df
| Rule | Support | Confidence | Lift | |
|---|---|---|---|---|
| 0 | ADICIONAL DE MUSSARELA -> ADICIONAL DE PRESUNTO | 0.0061068702290076335 | 0.5416666666666666 | 70.95833333333333 |
| 1 | CROISSANT -> ADICIONAL DE MUSSARELA | 0.009395184967704051 | 0.8333333333333333 | 4.038607474862455 |
| 2 | CROISSANT -> ADICIONAL DE PRESUNTO | 0.006928948913681738 | 0.9076923076923077 | 4.398975526465566 |
| 3 | CROISSANT -> AGUA MINERAL S/GAS 500ML | 0.01996476805637111 | 0.4197530864197531 | 2.0342615428936814 |
| 4 | PAO SOURDOUGH OVOS -> BOWL DE IOGURTE FRUTAS A... | 0.007868467410452143 | 0.24814814814814815 | 9.107678799489145 |
| 5 | CROISSANT -> CAFE CATUAI VERMELHO C/LEITE G | 0.007868467410452143 | 0.43506493506493504 | 2.1084677985645546 |
| 6 | CROISSANT -> CAFE CATUAI VERMELHO G 200ML | 0.013740458015267175 | 0.46799999999999997 | 2.2680819578827545 |
| 7 | CROISSANT -> CAFE COLMEIA C/ LEITE G 200ML | 0.00551967116852613 | 0.5280898876404494 | 2.5592973211487915 |
| 8 | CROISSANT -> CAFE COLMEIA G 200ML | 0.007633587786259542 | 0.4276315789473684 | 2.0724433094688917 |
| 9 | CROISSANT -> LARANJA 400 ML | 0.021374045801526718 | 0.6275862068965518 | 3.041489215551587 |
| 10 | CROISSANT BRIE PARMA MEL DE -> SUCO DE LARANJA... | 0.005989430416911333 | 0.2451923076923077 | 5.394864341085272 |
| 11 | CROISSANT IFOOD -> PASTEL DE NATA 60G IFOOD | 0.005167351732237228 | 0.4356435643564356 | 9.391151773405188 |
| 12 | MINI CROISSANT PAIN DE CHOC -> MINI CROISSANT ... | 0.00540223135642983 | 0.2358974358974359 | 5.046901172529314 |
| 13 | CROISSANT -> ADICIONAL DE MUSSARELA | 0.005637110980622431 | 0.5 | 72.16101694915254 |
fig_apriori_m1= go.Figure(data=[go.Table(
header=dict(values=list(['Regra', 'Suporte','Confiança','Incremento'] ),
font=dict(color='white', size=12), # configurando a cor e tamanho da letra
fill_color='seagreen',
align='left'),
cells=dict(values=[association_results_df['Rule'], association_results_df['Support'], association_results_df['Confidence'], association_results_df['Lift']],
fill_color='lightcyan',
align='left'))
])
fig_apriori_m1.update_layout(
title={
'text': "Aplicando o algoritmo Apriori - Modelo 01 - 2022 (Fev -Out)",
'y':0.95,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'})
py.plot(fig_apriori_m1, filename='/home/claudio/results/grafico19 - Aplicando_Apriori_Modelo_01.html')
'/home/claudio/results/grafico19 - Aplicando_Apriori_Modelo_01.html'
expr: syntax error: missing argument after ‘8’
/usr/share/atom/atom: bad option: --executed-from=/home/claudio/farina_in_natura001 /usr/share/atom/atom: bad option: --pid=2323
manha_filter = (transactions_filtrada['hora_do_dia']=='manhã')
transactions_filtrada_manha = transactions_filtrada[manha_filter]
# Create a list of lists that is conducive to the format expected from apyori
list_of_lists_manha = []
for transaction in list(set(transactions_filtrada_manha.venda)):
df = transactions_filtrada_manha[transactions_filtrada_manha.venda == transaction]
values = df.produto.values
list_of_lists_manha.append(list(values))
expr: syntax error: missing argument after ‘8’
/usr/share/atom/atom: bad option: --executed-from=/home/claudio/farina_in_natura001 /usr/share/atom/atom: bad option: --pid=524
# Create a list of association rules
association_rules_manha = apriori(list_of_lists_manha, min_confidence = 0.2, min_support = 0.005,min_lift = 3, min_length=3)
association_results_manha = list(association_rules_manha)
for item in association_results_manha:
# first index of the inner list
# Contains base item and add item
pair = item[0]
items = [x for x in pair]
print("Rule: " + items[0] + " -> " + items[1])
#second index of the inner list
print("Support: " + str(item[1]))
#third index of the list located at 0th
#of the third index of the inner list
print("Confidence: " + str(item[2][0][2]))
print("Lift: " + str(item[2][0][3]))
print("=====================================")
Rule: ADICIONAL DE MUSSARELA -> ADICIONAL DE PRESUNTO Support: 0.009764758100310697 Confidence: 0.5945945945945946 Lift: 40.5945945945946 ===================================== Rule: ADICIONAL DE MUSSARELA -> AGUA MINERAL S/GAS 500ML Support: 0.005326231691078562 Confidence: 0.3243243243243244 Lift: 5.294947121034079 ===================================== Rule: CROISSANT -> ADICIONAL DE MUSSARELA Support: 0.015090989791389259 Confidence: 0.918918918918919 Lift: 3.344627341396324 ===================================== Rule: CROISSANT -> ADICIONAL DE PRESUNTO Support: 0.013315579227696404 Confidence: 0.9090909090909091 Lift: 3.3088559259803203 ===================================== Rule: CROISSANT BRIE PARMA MEL DE -> AGUA MINERAL SEM GAS 500 ML Support: 0.007545494895694629 Confidence: 0.2 Lift: 5.006666666666667 ===================================== Rule: PAO SOURDOUGH OVOS -> BOWL DE IOGURTE FRUTAS AVEIA Support: 0.024411895250776743 Confidence: 0.2972972972972973 Lift: 3.9170222854433385 ===================================== Rule: SELECAO DE PAES GELEIA -> BOWL DE IOGURTE FRUTAS AVEIA Support: 0.010208610741233911 Confidence: 0.2674418604651163 Lift: 3.2570081709616594 ===================================== Rule: CAFE CATUAI VERMELHO C/LEITE G -> LARANJA 400 ML Support: 0.007989347536617843 Confidence: 0.23684210526315788 Lift: 3.629967776584318 ===================================== Rule: CROISSANT IFOOD -> COOKIES 80G IFOOD Support: 0.005770084332001775 Confidence: 0.619047619047619 Lift: 7.621389539422324 ===================================== Rule: CROISSANT BRIE PARMA MEL DE -> CROISSANT OVOS BACON MEL Support: 0.007989347536617843 Confidence: 0.2 Lift: 4.506 ===================================== Rule: CROISSANT BRIE PARMA MEL DE -> SUCO DE LARANJA 400ML Support: 0.00887705281846427 Confidence: 0.22222222222222224 Lift: 6.032128514056224 ===================================== Rule: CROISSANT IFOOD -> PASTEL DE NATA 60G IFOOD Support: 0.008433200177541057 Confidence: 0.5428571428571428 Lift: 6.683372365339578 ===================================== Rule: CROISSANT IFOOD -> SOURDOUGH 369G IFOOD Support: 0.008433200177541057 Confidence: 0.59375 Lift: 7.309938524590163 ===================================== Rule: CROISSANT MUSSATRELA DE -> SUCO DE LARANJA 400ML Support: 0.006657789613848202 Confidence: 0.2459016393442623 Lift: 6.674896306537625 ===================================== Rule: SUCO DE LARANJA 400ML -> CROISSANT OVOS BACON MEL Support: 0.008433200177541057 Confidence: 0.2289156626506024 Lift: 5.157469879518072 ===================================== Rule: MINI CROISSANT PAIN DE CHOC -> MINI CROISSANT 25G KIT C 5 Support: 0.007101642254771416 Confidence: 0.2857142857142857 Lift: 5.646616541353383 ===================================== Rule: CROISSANT -> ADICIONAL DE MUSSARELA Support: 0.009320905459387484 Confidence: 0.5675675675675677 Lift: 42.624324324324334 ===================================== Rule: CROISSANT -> CAFE CATUAI VERMELHO G 200ML Support: 0.006657789613848202 Confidence: 0.2054794520547945 Lift: 3.354675402025015 ===================================== Rule: CROISSANT -> LARANJA 400 ML Support: 0.006213936972924989 Confidence: 0.29166666666666663 Lift: 4.470238095238095 ===================================== Rule: CROISSANT -> CAFE CATUAI VERMELHO G 200ML Support: 0.006657789613848202 Confidence: 0.2054794520547945 Lift: 3.149287112105116 ===================================== Rule: CROISSANT -> CAFE CATUAI VERMELHO G 200ML Support: 0.005326231691078562 Confidence: 0.3076923076923077 Lift: 5.590570719602978 ===================================== Rule: CROISSANT -> PAO SOURDOUGH OVOS Support: 0.005326231691078562 Confidence: 0.3076923076923077 Lift: 4.71585557299843 ===================================== Rule: CROISSANT -> SANDUICHE DE PEPERONI BRIE Support: 0.006213936972924989 Confidence: 0.3783783783783784 Lift: 3.1928332827209234 =====================================
rule_list_manha = []
for item in association_results_manha:
# first index of the inner list
# Contains base item and add item
pair = item[0]
items = [x for x in pair]
rule_list_manha.append ( items[0] + " -> " + items[1])
support_list_manha =[]
for item in association_results_manha:
# first index of the inner list
# Contains base item and add item
pair = item[0]
#items = [x for x in pair]
#second index of the inner list
support_list_manha.append( str(item[1]))
confidence_list_manha =[]
for item in association_results_manha:
# first index of the inner list
# Contains base item and add item
pair = item[0]
items = [x for x in pair]
#second index of the inner list
confidence_list_manha.append(str(item[2][0][2]))
lift_list_manha =[]
for item in association_results_manha:
# first index of the inner list
# Contains base item and add item
pair = item[0]
items = [x for x in pair]
#second index of the inner list
lift_list_manha.append(str(item[2][0][3]))
association_results_manha_df=pd.DataFrame(list
(zip(rule_list_manha, support_list_manha, confidence_list_manha, lift_list_manha)), columns=['Rule','Support', 'Confidence', 'Lift'])
association_results_manha_df.sort_values(by=['Support'],ascending=False)
| Rule | Support | Confidence | Lift | |
|---|---|---|---|---|
| 5 | PAO SOURDOUGH OVOS -> BOWL DE IOGURTE FRUTAS A... | 0.024411895250776743 | 0.2972972972972973 | 3.9170222854433385 |
| 2 | CROISSANT -> ADICIONAL DE MUSSARELA | 0.015090989791389259 | 0.918918918918919 | 3.344627341396324 |
| 3 | CROISSANT -> ADICIONAL DE PRESUNTO | 0.013315579227696404 | 0.9090909090909091 | 3.3088559259803203 |
| 6 | SELECAO DE PAES GELEIA -> BOWL DE IOGURTE FRUT... | 0.010208610741233911 | 0.2674418604651163 | 3.2570081709616594 |
| 0 | ADICIONAL DE MUSSARELA -> ADICIONAL DE PRESUNTO | 0.009764758100310697 | 0.5945945945945946 | 40.5945945945946 |
| 16 | CROISSANT -> ADICIONAL DE MUSSARELA | 0.009320905459387484 | 0.5675675675675677 | 42.624324324324334 |
| 10 | CROISSANT BRIE PARMA MEL DE -> SUCO DE LARANJA... | 0.00887705281846427 | 0.22222222222222224 | 6.032128514056224 |
| 14 | SUCO DE LARANJA 400ML -> CROISSANT OVOS BACON MEL | 0.008433200177541057 | 0.2289156626506024 | 5.157469879518072 |
| 12 | CROISSANT IFOOD -> SOURDOUGH 369G IFOOD | 0.008433200177541057 | 0.59375 | 7.309938524590163 |
| 11 | CROISSANT IFOOD -> PASTEL DE NATA 60G IFOOD | 0.008433200177541057 | 0.5428571428571428 | 6.683372365339578 |
| 9 | CROISSANT BRIE PARMA MEL DE -> CROISSANT OVOS ... | 0.007989347536617843 | 0.2 | 4.506 |
| 7 | CAFE CATUAI VERMELHO C/LEITE G -> LARANJA 400 ML | 0.007989347536617843 | 0.23684210526315788 | 3.629967776584318 |
| 4 | CROISSANT BRIE PARMA MEL DE -> AGUA MINERAL SE... | 0.007545494895694629 | 0.2 | 5.006666666666667 |
| 15 | MINI CROISSANT PAIN DE CHOC -> MINI CROISSANT ... | 0.007101642254771416 | 0.2857142857142857 | 5.646616541353383 |
| 13 | CROISSANT MUSSATRELA DE -> SUCO DE LARANJA 400ML | 0.006657789613848202 | 0.2459016393442623 | 6.674896306537625 |
| 17 | CROISSANT -> CAFE CATUAI VERMELHO G 200ML | 0.006657789613848202 | 0.2054794520547945 | 3.354675402025015 |
| 19 | CROISSANT -> CAFE CATUAI VERMELHO G 200ML | 0.006657789613848202 | 0.2054794520547945 | 3.149287112105116 |
| 18 | CROISSANT -> LARANJA 400 ML | 0.006213936972924989 | 0.29166666666666663 | 4.470238095238095 |
| 22 | CROISSANT -> SANDUICHE DE PEPERONI BRIE | 0.006213936972924989 | 0.3783783783783784 | 3.1928332827209234 |
| 8 | CROISSANT IFOOD -> COOKIES 80G IFOOD | 0.005770084332001775 | 0.619047619047619 | 7.621389539422324 |
| 1 | ADICIONAL DE MUSSARELA -> AGUA MINERAL S/GAS 5... | 0.005326231691078562 | 0.3243243243243244 | 5.294947121034079 |
| 20 | CROISSANT -> CAFE CATUAI VERMELHO G 200ML | 0.005326231691078562 | 0.3076923076923077 | 5.590570719602978 |
| 21 | CROISSANT -> PAO SOURDOUGH OVOS | 0.005326231691078562 | 0.3076923076923077 | 4.71585557299843 |
def apriori_results_method_2(df = transactions_filtrada, min_support = 0.005, hora_do_dia = None, dia_da_semana = None,
min_threshold = 1, metric = 'lift'):
"""
Takes in a data frame and returns the results of the Apriori algorithm using the mlxtend Python package.
"""
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
if hora_do_dia:
df = df[df.hora_do_dia == hora_do_dia]
if dia_da_semana:
df = df[df.tipo_de_dia == dia_da_semana]
dummies_df = df.groupby(['venda', 'produto'])['produto'].count().unstack().\
reset_index().fillna(0).set_index('venda')
def encode_units(x):
if x <= 0:
return 0
if x >= 1:
return 1
dummies_df = dummies_df.applymap(encode_units)
frequent_itemsets = apriori(dummies_df.astype('bool'), min_support= min_support, use_colnames=True)
# DeprecationWarning: DataFrames with non-bool types result in worse computationalperformance and their support might be discontinued in the future.Please use a DataFrame with bool type"
# https://stackoverflow.com/questions/74114745/how-to-fix-deprecationwarning-dataframes-with-non-bool-types-result-in-worse-c
rules = association_rules(frequent_itemsets, metric=metric, min_threshold=min_threshold)
rules.drop_duplicates(subset='leverage', keep='first', inplace=True)
rules = rules.sort_values(by='lift', ascending=False)
return rules
all_day_method_2 = pd.DataFrame(apriori_results_method_2()).reset_index()
del all_day_method_2["index"]
#the first two columns when converted to the dataframe are in the "Frozenset" format, and turn out a error message, when run Plotly to plot the dataframe into a table
cols_to_check = ['antecedents','consequents']
all_day_method_2['antecedents']= all_day_method_2['antecedents'].apply(set) # converting the column from Frozenset to set
all_day_method_2['consequents']= all_day_method_2['consequents'].apply(set) # converting the column from Frozenset to set
all_day_method_2[cols_to_check]=all_day_method_2[cols_to_check].astype('string') # converting the two columns into string types
all_day_method_2[cols_to_check] = all_day_method_2[cols_to_check].replace({"{":""}, regex=True) # deleting the symbols in the columns
all_day_method_2[cols_to_check] = all_day_method_2[cols_to_check].replace({"'":""}, regex=True)
all_day_method_2[cols_to_check] = all_day_method_2[cols_to_check].replace({"}":""}, regex=True)
all_day_method_2.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 60 entries, 0 to 59 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 antecedents 60 non-null string 1 consequents 60 non-null string 2 antecedent support 60 non-null float64 3 consequent support 60 non-null float64 4 support 60 non-null float64 5 confidence 60 non-null float64 6 lift 60 non-null float64 7 leverage 60 non-null float64 8 conviction 60 non-null float64 dtypes: float64(7), string(2) memory usage: 4.3 KB
all_day_method_2.head()
| antecedents | consequents | antecedent support | consequent support | support | confidence | lift | leverage | conviction | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | CROISSANT, ADICIONAL DE MUSSARELA | ADICIONAL DE PRESUNTO | 0.009395 | 0.007634 | 0.005637 | 0.600000 | 78.600000 | 0.005565 | 2.480916 |
| 1 | CROISSANT, ADICIONAL DE PRESUNTO | ADICIONAL DE MUSSARELA | 0.006929 | 0.011274 | 0.005637 | 0.813559 | 72.161017 | 0.005559 | 5.303166 |
| 2 | ADICIONAL DE MUSSARELA | ADICIONAL DE PRESUNTO | 0.011274 | 0.007634 | 0.006107 | 0.541667 | 70.958333 | 0.006021 | 2.165163 |
| 3 | CROISSANT IFOOD | PASTEL DE NATA 60G IFOOD | 0.046389 | 0.011861 | 0.005167 | 0.111392 | 9.391152 | 0.004617 | 1.112008 |
| 4 | PAO SOURDOUGH OVOS | BOWL DE IOGURTE FRUTAS AVEIA | 0.027246 | 0.031709 | 0.007868 | 0.288793 | 9.107679 | 0.007005 | 1.361476 |
fig_apriori_m2= go.Figure(
data=
[
go.Table
(
header=dict
(
values=list
([
"antecedents","consequents","antecedent support","consequent support","support","confidence","lift","leverage", "conviction"
]),
font=dict(color='white', size=12), # configurando a cor e tamanho da letra
fill_color='seagreen',
align='left'
),
cells=dict
(
values=
[
all_day_method_2['antecedents'],
all_day_method_2['consequents'],
all_day_method_2['antecedent support'],
all_day_method_2['consequent support'],
all_day_method_2['support'],
all_day_method_2['confidence'],
all_day_method_2['lift'],
all_day_method_2['leverage'],
all_day_method_2['conviction'],
],
fill_color='lightcyan',
align='left'
)
)
]
)
fig_apriori_m2.update_layout(
title={
'text': "Aplicando o algoritmo Apriori - Modelo 02 - 2022 (Fev -Out)",
'y':0.95,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'})
py.plot(fig_apriori_m2, filename='/home/claudio/results/grafico20 - Apriori_Method_02_all_Day.html')
'/home/claudio/results/grafico20 - Apriori_Method_02_all_Day.html'
morning_method_2 = apriori_results_method_2(hora_do_dia = 'manhã')
/usr/share/atom/atom: bad option: --executed-from=/home/claudio/farina_in_natura001 /usr/share/atom/atom: bad option: --pid=669
expr: syntax error: missing argument after ‘8’
afternoon_method_2 = apriori_results_method_2(hora_do_dia = 'tarde')
evening_method_2 = apriori_results_method_2(hora_do_dia = 'noite')
weekend_method_2 = apriori_results_method_2(dia_da_semana = 'FDS')
weekday_method_2 = apriori_results_method_2(dia_da_semana = 'DDS')
weekend_morning_method_2 = apriori_results_method_2(dia_da_semana = 'FDS', hora_do_dia = 'manhã')
weekday_afternoon_method_2 = apriori_results_method_2(dia_da_semana = 'DDS', hora_do_dia = 'tarde')
weekday_afternoon_method_2.groupby(by='antecedents')['lift'].sum().sort_values(ascending=False)
antecedents (CROISSANT, ADICIONAL DE MUSSARELA) 132.319444 (CROISSANT, ADICIONAL DE PRESUNTO) 123.727273 (ADICIONAL DE MUSSARELA) 123.727273 (CROISSANT) 37.912645 (PASTEL DE NATA 60G) 27.373780 (CROISSANT IFOOD) 13.610000 (COOKIES 80G) 12.801238 (PASTEL DE NATA 60G, BOMBOLONI NUTELLA 80G) 11.864259 (FOLHADO FRANGO 160G) 11.376274 (FOLHADO FRANGO 160G, PASTEL DE NATA 60G) 10.807043 (BOMBOLONI NUTELLA 80G) 8.332997 (SOURDOUGH AZEITONA E ALECRIM) 7.995589 (COCA COLA LATA 350 ML) 6.903623 (CROISSANT TRADICIONAL 80 G) 5.705271 (CAFE EQUILIBRADO CAPSULA P) 5.242681 (FOLHADO MACA 80G) 5.070133 (CROISSANT AMENDOAS 100G) 4.930859 (FOLHADO FRANGO 160G, BOMBOLONI NUTELLA 80G) 4.472770 (MINI CROISSANT PAIN DE CHOC) 4.455655 (CROISSANT, PASTEL DE NATA 60G) 3.434391 (CROISSANT, FOLHADO FRANGO 160G) 2.354089 (PIZZA AL TAGLIO GORGONZOLA E) 2.223856 (MINI CROISSANT 25G KIT C 5) 1.972464 (MINI CROISSANT 25G) 1.825620 (SOURDOUGH SEMI INTEGRAL 370G) 1.779085 Name: lift, dtype: float64